bike <- read.csv('C:/Users/Jai Katariya/Desktop/Jai Katariya/Self Learning/R/Course 2-R-Course-HTML-Notes/R-Course-HTML-Notes/R-for-Data-Science-and-Machine-Learning/Training Exercises/Machine Learning Projects/CSV files for ML Projects/bikeshare.csv')
bike <- as.data.frame(bike)
head(bike)
str(bike)
'data.frame':   10886 obs. of  12 variables:
 $ datetime  : Factor w/ 10886 levels "2011-01-01 00:00:00",..: 1 2 3 4 5 6 7 8 9 10 ...
 $ season    : int  1 1 1 1 1 1 1 1 1 1 ...
 $ holiday   : int  0 0 0 0 0 0 0 0 0 0 ...
 $ workingday: int  0 0 0 0 0 0 0 0 0 0 ...
 $ weather   : int  1 1 1 1 1 2 1 1 1 1 ...
 $ temp      : num  9.84 9.02 9.02 9.84 9.84 ...
 $ atemp     : num  14.4 13.6 13.6 14.4 14.4 ...
 $ humidity  : int  81 80 80 75 75 75 80 86 75 76 ...
 $ windspeed : num  0 0 0 0 0 ...
 $ casual    : int  3 8 5 3 0 0 2 1 1 8 ...
 $ registered: int  13 32 27 10 1 1 0 2 7 6 ...
 $ count     : int  16 40 32 13 1 1 2 3 8 14 ...
summary(bike)
                datetime         season         holiday          workingday        weather           temp           atemp          humidity     
 2011-01-01 00:00:00:    1   Min.   :1.000   Min.   :0.00000   Min.   :0.0000   Min.   :1.000   Min.   : 0.82   Min.   : 0.76   Min.   :  0.00  
 2011-01-01 01:00:00:    1   1st Qu.:2.000   1st Qu.:0.00000   1st Qu.:0.0000   1st Qu.:1.000   1st Qu.:13.94   1st Qu.:16.66   1st Qu.: 47.00  
 2011-01-01 02:00:00:    1   Median :3.000   Median :0.00000   Median :1.0000   Median :1.000   Median :20.50   Median :24.24   Median : 62.00  
 2011-01-01 03:00:00:    1   Mean   :2.507   Mean   :0.02857   Mean   :0.6809   Mean   :1.418   Mean   :20.23   Mean   :23.66   Mean   : 61.89  
 2011-01-01 04:00:00:    1   3rd Qu.:4.000   3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:2.000   3rd Qu.:26.24   3rd Qu.:31.06   3rd Qu.: 77.00  
 2011-01-01 05:00:00:    1   Max.   :4.000   Max.   :1.00000   Max.   :1.0000   Max.   :4.000   Max.   :41.00   Max.   :45.45   Max.   :100.00  
 (Other)            :10880                                                                                                                      
   windspeed          casual         registered        count      
 Min.   : 0.000   Min.   :  0.00   Min.   :  0.0   Min.   :  1.0  
 1st Qu.: 7.002   1st Qu.:  4.00   1st Qu.: 36.0   1st Qu.: 42.0  
 Median :12.998   Median : 17.00   Median :118.0   Median :145.0  
 Mean   :12.799   Mean   : 36.02   Mean   :155.6   Mean   :191.6  
 3rd Qu.:16.998   3rd Qu.: 49.00   3rd Qu.:222.0   3rd Qu.:284.0  
 Max.   :56.997   Max.   :367.00   Max.   :886.0   Max.   :977.0  
                                                                  
tail(bike)

Exploratory Data Analysis¶

library(Amelia)
missmap(bike, main = 'missing map', col = c('yellow', 'black'), legend =F)

Create a scatter plot of count vs temp.

library(ggplot2)
temp_scatterplot <- ggplot(bike, aes(temp, count)) +
  
                    geom_point(aes(color = temp),alpha = 0.2)+ 
  
                    xlab('Temperature') + ylab("Count of Rentals")
print(temp_scatterplot)

Plot count versus datetime as a scatterplot with a color gradient based on temperature. Convert the datetime column into POSIXct before plotting.

bike$datetime <- as.POSIXct(bike$datetime)
datetime_scatterplot <- ggplot(bike, aes(datetime, count)) + 
  
                        geom_point(alpha = 0.3, aes(color = temp))+
  
                        scale_color_gradient(low = 'green', high = 'red') +
  
                        xlab('Time') + ylab("Count of Rentals")
print(datetime_scatterplot)

cor_tempVscount <- cor(bike[, c('temp', 'count')])
print(cor_tempVscount)
           temp     count
temp  1.0000000 0.3944536
count 0.3944536 1.0000000

Created a boxplot to explore the season data with the y axis indicating count and the x axis begin a box for each season.

seasons <- ggplot(bike, aes(factor(season), count)) + geom_boxplot(aes(color = factor(season))) + xlab('Season') + ylab("Count of Rentals")
print(seasons)

Feature Engineering

Before dealing with date time column, we need to feature it.

Created an “hour” column that takes the hour from the datetime column.

bike$hour <- sapply(bike$datetime, function(x){format(x, '%H')})
head(bike)

Now create a scatterplot of count versus hour, with color scale based on temp. Only use bike data where workingday==1. Additions:Used the additional layer: scale_color_gradientn(colors=c(‘color1’,color2,etc..)) where the colors argument is a vector gradient of colors you choose, not just high and low. Used position=position_jitter(w=1, h=0) inside of geom_point() and check out what it does.

bike_data <- subset(bike, bike$workingday == 1)
hour_scatterplot <- ggplot(bike_data,aes(hour, count)) + geom_point(aes(color = temp), 
                    
                    position =  position_jitter(w = 1, h=0), alpha = 0.5)+ scale_color_gradientn(colors= c('dark blue', 'blue', 'light green', 'yellow', 'orange', 'red')) + xlab('Hour') + ylab("Count of Rentals")
print(hour_scatterplot)

library(dplyr)
hour2_scatterplot <- ggplot(filter(bike, workingday ==0), aes(hour, count)) + 
                    geom_point(aes(color = temp), position = position_jitter(w =1, h = 0), 
                    alpha = 0.5) + scale_color_gradientn(colors= c('dark blue', 'blue', 
                    'light green', 'yellow', 'orange', 'red')) + xlab('Hour') + ylab("Count of Rentals")
print(hour2_scatterplot)

Compare Count of Rentals with Casual and Registered.

Ca_scatterplot <- ggplot(bike,aes(hour, casual)) + geom_point(colour ='Red') + xlab('Hour') + ylab("Count of Casual Rentals")
print(Ca_scatterplot)

Re_scatterplot <- ggplot(bike,aes(hour, registered)) + geom_point(colour ='blue') + xlab('Hour') + ylab("Count of Registered Rentals")
print(Re_scatterplot)

For an interactive plot, I installed plotly library.

#install.packages('plotly')
library(ggplot2)
library(plotly)
CountVsTemp <- ggplot(bike, aes(temp, count)) + geom_point(aes(color = temp)) + xlab("Temperature") + ylab("Count of Rentals")
gpl <- ggplotly(CountVsTemp)
We recommend that you use the dev version of ggplot2 with `ggplotly()`
Install it with: `devtools::install_github('hadley/ggplot2')`
print(gpl)

NULL
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCmBgYHtyfQ0KYmlrZSA8LSByZWFkLmNzdignQzovVXNlcnMvSmFpIEthdGFyaXlhL0Rlc2t0b3AvSmFpIEthdGFyaXlhL1NlbGYgTGVhcm5pbmcvUi9Db3Vyc2UgMi1SLUNvdXJzZS1IVE1MLU5vdGVzL1ItQ291cnNlLUhUTUwtTm90ZXMvUi1mb3ItRGF0YS1TY2llbmNlLWFuZC1NYWNoaW5lLUxlYXJuaW5nL1RyYWluaW5nIEV4ZXJjaXNlcy9NYWNoaW5lIExlYXJuaW5nIFByb2plY3RzL0NTViBmaWxlcyBmb3IgTUwgUHJvamVjdHMvYmlrZXNoYXJlLmNzdicpDQpiaWtlIDwtIGFzLmRhdGEuZnJhbWUoYmlrZSkNCmhlYWQoYmlrZSkNCmBgYA0KYGBge3J9DQpzdHIoYmlrZSkNCmBgYA0KDQpgYGB7cn0NCnN1bW1hcnkoYmlrZSkNCmBgYA0KDQpgYGB7cn0NCnRhaWwoYmlrZSkNCmBgYA0KDQpFeHBsb3JhdG9yeSBEYXRhIEFuYWx5c2lztg0KYGBge3J9DQoNCmxpYnJhcnkoQW1lbGlhKQ0KbWlzc21hcChiaWtlLCBtYWluID0gJ21pc3NpbmcgbWFwJywgY29sID0gYygneWVsbG93JywgJ2JsYWNrJyksIGxlZ2VuZCA9RikNCg0KYGBgDQoNCiBDcmVhdGUgYSBzY2F0dGVyIHBsb3Qgb2YgY291bnQgdnMgdGVtcC4NCiANCmBgYHtyfQ0KDQpsaWJyYXJ5KGdncGxvdDIpDQoNCnRlbXBfc2NhdHRlcnBsb3QgPC0gZ2dwbG90KGJpa2UsIGFlcyh0ZW1wLCBjb3VudCkpICsNCiAgDQogICAgICAgICAgICAgICAgICAgIGdlb21fcG9pbnQoYWVzKGNvbG9yID0gdGVtcCksYWxwaGEgPSAwLjIpKyANCiAgDQogICAgICAgICAgICAgICAgICAgIHhsYWIoJ1RlbXBlcmF0dXJlJykgKyB5bGFiKCJDb3VudCBvZiBSZW50YWxzIikNCg0KcHJpbnQodGVtcF9zY2F0dGVycGxvdCkNCg0KYGBgDQpQbG90IGNvdW50IHZlcnN1cyBkYXRldGltZSBhcyBhIHNjYXR0ZXJwbG90IHdpdGggYSBjb2xvciBncmFkaWVudCBiYXNlZCBvbiB0ZW1wZXJhdHVyZS4gDQpDb252ZXJ0IHRoZSBkYXRldGltZSBjb2x1bW4gaW50byBQT1NJWGN0IGJlZm9yZSBwbG90dGluZy4NCg0KYGBge3J9DQoNCmJpa2UkZGF0ZXRpbWUgPC0gYXMuUE9TSVhjdChiaWtlJGRhdGV0aW1lKQ0KDQpkYXRldGltZV9zY2F0dGVycGxvdCA8LSBnZ3Bsb3QoYmlrZSwgYWVzKGRhdGV0aW1lLCBjb3VudCkpICsgDQogIA0KICAgICAgICAgICAgICAgICAgICAgICAgZ2VvbV9wb2ludChhbHBoYSA9IDAuMywgYWVzKGNvbG9yID0gdGVtcCkpKw0KICANCiAgICAgICAgICAgICAgICAgICAgICAgIHNjYWxlX2NvbG9yX2dyYWRpZW50KGxvdyA9ICdncmVlbicsIGhpZ2ggPSAncmVkJykgKw0KICANCiAgICAgICAgICAgICAgICAgICAgICAgIHhsYWIoJ1RpbWUnKSArIHlsYWIoIkNvdW50IG9mIFJlbnRhbHMiKQ0KDQpwcmludChkYXRldGltZV9zY2F0dGVycGxvdCkNCg0KYGBgDQpgYGB7cn0NCg0KY29yX3RlbXBWc2NvdW50IDwtIGNvcihiaWtlWywgYygndGVtcCcsICdjb3VudCcpXSkNCnByaW50KGNvcl90ZW1wVnNjb3VudCkNCg0KYGBgDQoNCkNyZWF0ZWQgYSBib3hwbG90IHRvIGV4cGxvcmUgdGhlIHNlYXNvbiBkYXRhIHdpdGggdGhlIHkgYXhpcyBpbmRpY2F0aW5nIGNvdW50IGFuZCB0aGUgeCBheGlzIGJlZ2luIGEgYm94IGZvciBlYWNoIHNlYXNvbi4NCg0KYGBge3J9DQoNCnNlYXNvbnMgPC0gZ2dwbG90KGJpa2UsIGFlcyhmYWN0b3Ioc2Vhc29uKSwgY291bnQpKSArIGdlb21fYm94cGxvdChhZXMoY29sb3IgPSBmYWN0b3Ioc2Vhc29uKSkpICsgeGxhYignU2Vhc29uJykgKyB5bGFiKCJDb3VudCBvZiBSZW50YWxzIikNCg0KcHJpbnQoc2Vhc29ucykNCg0KYGBgDQoNCkZlYXR1cmUgRW5naW5lZXJpbmcNCg0KQmVmb3JlIGRlYWxpbmcgd2l0aCBkYXRlIHRpbWUgY29sdW1uLCB3ZSBuZWVkIHRvIGZlYXR1cmUgaXQuDQoNCkNyZWF0ZWQgYW4gImhvdXIiIGNvbHVtbiB0aGF0IHRha2VzIHRoZSBob3VyIGZyb20gdGhlIGRhdGV0aW1lIGNvbHVtbi4NCg0KYGBge3J9DQoNCmJpa2UkaG91ciA8LSBzYXBwbHkoYmlrZSRkYXRldGltZSwgZnVuY3Rpb24oeCl7Zm9ybWF0KHgsICclSCcpfSkNCmhlYWQoYmlrZSkNCmBgYA0KTm93IGNyZWF0ZSBhIHNjYXR0ZXJwbG90IG9mIGNvdW50IHZlcnN1cyBob3VyLCB3aXRoIGNvbG9yIHNjYWxlIGJhc2VkIG9uIHRlbXAuIE9ubHkgdXNlIGJpa2UgZGF0YSB3aGVyZSB3b3JraW5nZGF5PT0xLg0KQWRkaXRpb25zOlVzZWQgdGhlIGFkZGl0aW9uYWwgbGF5ZXI6IHNjYWxlX2NvbG9yX2dyYWRpZW50bihjb2xvcnM9YygnY29sb3IxJyxjb2xvcjIsZXRjLi4pKSB3aGVyZSB0aGUgY29sb3JzIGFyZ3VtZW50IGlzIGEgdmVjdG9yIGdyYWRpZW50IG9mIGNvbG9ycyB5b3UgY2hvb3NlLCBub3QganVzdCBoaWdoIGFuZCBsb3cuIA0KVXNlZCBwb3NpdGlvbj1wb3NpdGlvbl9qaXR0ZXIodz0xLCBoPTApIGluc2lkZSBvZiBnZW9tX3BvaW50KCkgYW5kIGNoZWNrIG91dCB3aGF0IGl0IGRvZXMuDQoNCmBgYHtyfQ0KDQpiaWtlX2RhdGEgPC0gc3Vic2V0KGJpa2UsIGJpa2Ukd29ya2luZ2RheSA9PSAxKQ0KDQpob3VyX3NjYXR0ZXJwbG90IDwtIGdncGxvdChiaWtlX2RhdGEsYWVzKGhvdXIsIGNvdW50KSkgKyBnZW9tX3BvaW50KGFlcyhjb2xvciA9IHRlbXApLCANCiAgICAgICAgICAgICAgICAgICAgDQogICAgICAgICAgICAgICAgICAgIHBvc2l0aW9uID0gIHBvc2l0aW9uX2ppdHRlcih3ID0gMSwgaD0wKSwgYWxwaGEgPSAwLjUpKyBzY2FsZV9jb2xvcl9ncmFkaWVudG4oY29sb3JzPSBjKCdkYXJrIGJsdWUnLCAnYmx1ZScsICdsaWdodCBncmVlbicsICd5ZWxsb3cnLCAnb3JhbmdlJywgJ3JlZCcpKSArIHhsYWIoJ0hvdXInKSArIHlsYWIoIkNvdW50IG9mIFJlbnRhbHMiKQ0KDQpwcmludChob3VyX3NjYXR0ZXJwbG90KQ0KDQpgYGANCg0KYGBge3J9DQoNCmxpYnJhcnkoZHBseXIpDQpob3VyMl9zY2F0dGVycGxvdCA8LSBnZ3Bsb3QoZmlsdGVyKGJpa2UsIHdvcmtpbmdkYXkgPT0wKSwgYWVzKGhvdXIsIGNvdW50KSkgKyANCiAgDQogICAgICAgICAgICAgICAgICAgIGdlb21fcG9pbnQoYWVzKGNvbG9yID0gdGVtcCksIHBvc2l0aW9uID0gcG9zaXRpb25faml0dGVyKHcgPTEsIGggPSAwKSwgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgDQogICAgICAgICAgICAgICAgICAgIGFscGhhID0gMC41KSArIHNjYWxlX2NvbG9yX2dyYWRpZW50bihjb2xvcnM9IGMoJ2RhcmsgYmx1ZScsICdibHVlJywgDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgDQogICAgICAgICAgICAgICAgICAgICdsaWdodCBncmVlbicsICd5ZWxsb3cnLCAnb3JhbmdlJywgJ3JlZCcpKSArIHhsYWIoJ0hvdXInKSArIHlsYWIoIkNvdW50IG9mIFJlbnRhbHMiKQ0KDQpwcmludChob3VyMl9zY2F0dGVycGxvdCkNCg0KYGBgDQoNCkNvbXBhcmUgQ291bnQgb2YgUmVudGFscyB3aXRoIENhc3VhbCBhbmQgUmVnaXN0ZXJlZC4NCg0KYGBge3J9DQoNCkNhX3NjYXR0ZXJwbG90IDwtIGdncGxvdChiaWtlLGFlcyhob3VyLCBjYXN1YWwpKSArIGdlb21fcG9pbnQoY29sb3VyID0nUmVkJykgKyB4bGFiKCdIb3VyJykgKyB5bGFiKCJDb3VudCBvZiBDYXN1YWwgUmVudGFscyIpDQpwcmludChDYV9zY2F0dGVycGxvdCkNCg0KYGBgDQpgYGB7cn0NCg0KUmVfc2NhdHRlcnBsb3QgPC0gZ2dwbG90KGJpa2UsYWVzKGhvdXIsIHJlZ2lzdGVyZWQpKSArIGdlb21fcG9pbnQoY29sb3VyID0nYmx1ZScpICsgeGxhYignSG91cicpICsgeWxhYigiQ291bnQgb2YgUmVnaXN0ZXJlZCBSZW50YWxzIikNCnByaW50KFJlX3NjYXR0ZXJwbG90KQ0KDQpgYGANCg0KRm9yIGFuIGludGVyYWN0aXZlIHBsb3QsIEkgaW5zdGFsbGVkIHBsb3RseSBsaWJyYXJ5LiANCg0KYGBge3J9DQojaW5zdGFsbC5wYWNrYWdlcygncGxvdGx5JykNCmxpYnJhcnkoZ2dwbG90MikNCmxpYnJhcnkocGxvdGx5KQ0KDQpDb3VudFZzVGVtcCA8LSBnZ3Bsb3QoYmlrZSwgYWVzKHRlbXAsIGNvdW50KSkgKyBnZW9tX3BvaW50KGFlcyhjb2xvciA9IHRlbXApKSArIHhsYWIoIlRlbXBlcmF0dXJlIikgKyB5bGFiKCJDb3VudCBvZiBSZW50YWxzIikNCg0KZ3BsIDwtIGdncGxvdGx5KENvdW50VnNUZW1wKQ0KDQpwcmludChncGwpDQpgYGANCg0K